-
Notifications
You must be signed in to change notification settings - Fork 15.3k
Reland: [GlobalISel] prevent G_UNMERGE_VALUES for vectors with different elements #144661
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…ents This commit prevents building a G_UNMERGE_VALUES instruction with different source and destination vector elements in LegalizationArtifactCombiner::ArtifactValueFinder::tryCombineMergeLike(), e.g.: `%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)` This LLVM defect was identified via the AMD Fuzzing project.
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Robert Imschweiler (ro-i) ChangesThis commit prevents building a G_UNMERGE_VALUES instruction with different source and destination vector elements in This LLVM defect was identified via the AMD Fuzzing project. Note: this originally was #133335, which I had to revert because of changes that have been made to main during the lifetime of that PR. They led to two test failures which did not show as merge conflicts but still made a rebase necessary. You can see the changes needed after rebasing the PR in the second commit of this PR. Full diff: https://github.com/llvm/llvm-project/pull/144661.diff 3 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 22f6a5fde546a..8f560c42082f9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -997,6 +997,7 @@ class LegalizationArtifactCombiner {
// Recognize UnmergeSrc that can be unmerged to DstTy directly.
// Types have to be either both vector or both non-vector types.
+ // In case of vector types, the scalar elements need to match.
// Merge-like opcodes are combined one at the time. First one creates new
// unmerge, following should use the same unmerge (builder performs CSE).
//
@@ -1005,7 +1006,9 @@ class LegalizationArtifactCombiner {
// %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3
//
// %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc
- if ((DstTy.isVector() == UnmergeSrcTy.isVector()) &&
+ if (((!DstTy.isVector() && !UnmergeSrcTy.isVector()) ||
+ (DstTy.isVector() && UnmergeSrcTy.isVector() &&
+ DstTy.getScalarType() == UnmergeSrcTy.getScalarType())) &&
(Elt0UnmergeIdx % NumMIElts == 0) &&
getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) {
if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
index 8134eb3ca2afc..51d0b225b2a27 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
@@ -6506,3 +6506,47 @@ entry:
%insert = insertelement <5 x double> %vec, double %val, i32 %idx
ret <5 x double> %insert
}
+
+; Found by fuzzer, reduced with llvm-reduce.
+define void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) {
+; GPRIDX-LABEL: insert_very_small_from_very_large:
+; GPRIDX: ; %bb.0: ; %bb
+; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GPRIDX-NEXT: v_lshrrev_b32_e32 v0, 1, v0
+; GPRIDX-NEXT: v_and_b32_e32 v0, 1, v0
+; GPRIDX-NEXT: v_lshlrev_b16_e32 v0, 1, v0
+; GPRIDX-NEXT: v_and_b32_e32 v0, 3, v0
+; GPRIDX-NEXT: flat_store_byte v[16:17], v0
+; GPRIDX-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GPRIDX-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: insert_very_small_from_very_large:
+; GFX10: ; %bb.0: ; %bb
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
+; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
+; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX10-NEXT: flat_store_byte v[16:17], v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: insert_very_small_from_very_large:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_lshrrev_b16 v0.l, 1, v0.l
+; GFX11-NEXT: v_and_b16 v0.l, v0.l, 1
+; GFX11-NEXT: v_lshlrev_b16 v0.l, 1, v0.l
+; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX11-NEXT: flat_store_b8 v[16:17], v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %a = bitcast <32 x i16> %L3 to i512
+ %b = trunc i512 %a to i8
+ %c = trunc i8 %b to i2
+ %d = bitcast i2 %c to <2 x i1>
+ %insert = insertelement <2 x i1> %d, i1 false, i32 0
+ store <2 x i1> %insert, ptr %ptr, align 1
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll
index fdc1dd6cce8e1..53b2542cf9a7e 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll
@@ -2166,14 +2166,14 @@ define <6 x i8> @load_v6i8(ptr addrspace(8) inreg %buf) {
; GISEL-LABEL: load_v6i8:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0
+; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4
; GISEL-NEXT: s_waitcnt vmcnt(1)
-; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
-; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
; GISEL-NEXT: s_setpc_b64 s[30:31]
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
%ret = load <6 x i8>, ptr addrspace(7) %p
@@ -3630,10 +3630,10 @@ define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) inreg %buf) {
; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 glc
; GISEL-NEXT: s_waitcnt vmcnt(1)
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
-; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
; GISEL-NEXT: s_setpc_b64 s[30:31]
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
%ret = load volatile <6 x i8>, ptr addrspace(7) %p
|
This commit prevents building a G_UNMERGE_VALUES instruction with different source and destination vector elements in
LegalizationArtifactCombiner::ArtifactValueFinder::tryCombineMergeLike(), e.g.:%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)This LLVM defect was identified via the AMD Fuzzing project.
Note: this originally was #133335, which I had to revert because of changes that have been made to main during the lifetime of that PR. They led to two test failures which did not show as merge conflicts but still made a rebase necessary. You can see the changes needed after rebasing the PR in the second commit of this PR.